library(tidyverse) # for data cleaning and plotting
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.3 ✓ dplyr 1.0.1
## ✓ tidyr 1.1.1 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(googlesheets4) # for reading googlesheet data
library(lubridate) # for date manipulation
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(openintro) # for the abbr2state() function
## Loading required package: airports
## Loading required package: cherryblossom
## Loading required package: usdata
library(palmerpenguins)# for Palmer penguin data
library(maps) # for map data
##
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
##
## map
library(ggmap) # for mapping points on maps
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.
library(gplots) # for col2hex() function
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
library(RColorBrewer) # for color palettes
library(sf) # for working with spatial data
## Linking to GEOS 3.4.2, GDAL 2.4.2, PROJ 4.8.0
library(leaflet) # for highly customizable mapping
library(ggthemes) # for more themes (including theme_map())
library(plotly) # for the ggplotly() - basic interactivity
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggmap':
##
## wind
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(gganimate) # for adding animation layers to ggplots
library(transformr) # for "tweening" (gganimate)
##
## Attaching package: 'transformr'
## The following object is masked from 'package:sf':
##
## st_normalize
library(shiny) # for creating interactive apps
library(gifski)
gs4_deauth() # To not have to authorize each time you knit.
theme_set(theme_minimal())
# SNCF Train data
small_trains <- read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-02-26/small_trains.csv")
## Parsed with column specification:
## cols(
## year = col_double(),
## month = col_double(),
## service = col_character(),
## departure_station = col_character(),
## arrival_station = col_character(),
## journey_time_avg = col_double(),
## total_num_trips = col_double(),
## avg_delay_all_departing = col_double(),
## avg_delay_all_arriving = col_double(),
## num_late_at_departure = col_double(),
## num_arriving_late = col_double(),
## delay_cause = col_character(),
## delayed_number = col_double()
## )
# Lisa's garden data
garden_harvest <- read_sheet("https://docs.google.com/spreadsheets/d/1DekSazCzKqPS2jnGhKue7tLxRU3GVL1oxi-4bEM5IWw/edit?usp=sharing") %>%
mutate(date = ymd(date))
## Reading from "2020_harvest"
## Range "Sheet1"
# Lisa's Mallorca cycling data
mallorca_bike_day7 <- read_csv("https://www.dropbox.com/s/zc6jan4ltmjtvy0/mallorca_bike_day7.csv?dl=1") %>%
select(1:4, speed)
## Parsed with column specification:
## cols(
## lon = col_double(),
## lat = col_double(),
## ele = col_double(),
## time = col_datetime(format = ""),
## extensions = col_double(),
## ele.num = col_double(),
## date = col_date(format = ""),
## hrminsec = col_datetime(format = ""),
## time_hr = col_double(),
## dist_km = col_double(),
## speed = col_double()
## )
# Heather Lendway's Ironman 70.3 Pan Am championships Panama data
panama_swim <- read_csv("https://raw.githubusercontent.com/llendway/gps-data/master/data/panama_swim_20160131.csv")
## Parsed with column specification:
## cols(
## lon = col_double(),
## lat = col_double(),
## time = col_datetime(format = ""),
## extensions = col_double(),
## ele = col_logical(),
## event = col_character(),
## date = col_date(format = ""),
## hrminsec = col_datetime(format = "")
## )
panama_bike <- read_csv("https://raw.githubusercontent.com/llendway/gps-data/master/data/panama_bike_20160131.csv")
## Parsed with column specification:
## cols(
## lon = col_double(),
## lat = col_double(),
## ele = col_double(),
## time = col_datetime(format = ""),
## extensions = col_double(),
## event = col_character(),
## date = col_date(format = ""),
## hrminsec = col_datetime(format = "")
## )
panama_run <- read_csv("https://raw.githubusercontent.com/llendway/gps-data/master/data/panama_run_20160131.csv")
## Parsed with column specification:
## cols(
## lon = col_double(),
## lat = col_double(),
## ele = col_double(),
## time = col_datetime(format = ""),
## extensions = col_double(),
## event = col_character(),
## date = col_date(format = ""),
## hrminsec = col_datetime(format = "")
## )
#COVID-19 data from the New York Times
covid19 <- read_csv("https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-states.csv")
## Parsed with column specification:
## cols(
## date = col_date(format = ""),
## state = col_character(),
## fips = col_character(),
## cases = col_double(),
## deaths = col_double()
## )
data_site <-
"https://www.macalester.edu/~dshuman1/data/112/2014-Q4-Trips-History-Data.rds"
Trips <- readRDS(gzcon(url(data_site)))
Put your homework on GitHub!
Go here or to previous homework to remind yourself how to get set up.
Once your repository is created, you should always open your project rather than just opening an .Rmd file. You can do that by either clicking on the .Rproj file in your repository folder on your computer. Or, by going to the upper right hand corner in R Studio and clicking the arrow next to where it says Project: (None). You should see your project come up in that list if you’ve used it recently. You could also go to File –> Open Project and navigate to your .Rproj file.
Instructions
Put your name at the top of the document.
For ALL graphs, you should include appropriate labels.
Feel free to change the default theme, which I currently have set to theme_minimal().
Use good coding practice. Read the short sections on good code with pipes and ggplot2. This is part of your grade!
NEW!! With animated graphs, add eval=FALSE to the code chunk that creates the animation and saves it using anim_save(). Add another code chunk to reread the gif back into the file. See the tutorial for help.
When you are finished with ALL the exercises, uncomment the options at the top so your document looks nicer. Don’t do it before then, or else you might miss some important warnings and messages.
Warm-up exercises from tutorial
- Choose 2 graphs you have created for ANY assignment in this class and add interactivity using the
ggplotly() function.
covid_graph_1 <- covid19 %>%
filter(state %in% c("Minnesota","Wisconsin","Iowa","North Dakota","South Dakota")) %>%
group_by(state) %>%
mutate(total_cases = cumsum(cases)) %>%
ggplot(mapping = aes(x=date,y=total_cases,color=state)) +
geom_line() +
labs(title = "Covid19 Cases Over Time", x= "Date", y="Total Cases")
ggplotly(covid_graph_1)
bike_graph_1 <- Trips %>%
mutate (time_of_day = hour(sdate) + minute(sdate)/60,
week_day = wday(sdate), label = TRUE) %>%
ggplot(aes(x = time_of_day)) +
facet_wrap(vars(week_day)) +
geom_density(aes(fill = client, alpha = .5), color = NA) +
theme(axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
axis.text.x = element_blank()) +
labs(title = "Trips by Casual and Registered Users over 1 Day",
x = "Time of Day",
y = "Proportion of Daily Trips",
fill = "Client Type",
alpha = NULL)
ggplotly(bike_graph_1)
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
- Use animation to tell an interesting story with the
small_trains dataset that contains data from the SNCF (National Society of French Railways). These are Tidy Tuesday data! Read more about it here.
I’ve used animation to look at the proportion of trains from different stations arriving late to Paris Lyon station and organizing them by mean journey time, to see whether longer journeys are more likely to arrive late. The graph is somewhat inconclusive and could require more knowledge about the local rail system to fully answer.
small_trains %>%
filter(arrival_station == "PARIS LYON") %>%
group_by(departure_station) %>%
mutate(journey_time_avg = mean(journey_time_avg),
prop_arriving_late = sum(num_arriving_late)/sum(total_num_trips)) %>%
arrange(desc(journey_time_avg)) -> paris_station_late_arr
paris_station_late_arr %>%
ggplot(aes(x = journey_time_avg,
y = prop_arriving_late)) +
geom_smooth() +
labs(title = "Late Arrivals to Paris-Lyon by Journey Length from Departure Station",
x = "Mean Journey Length from Departure Station (mins)",
y = "Proportion of Late Arrivals")+
transition_reveal(journey_time_avg)-> train_graph
animate(train_graph, nframes = 100, duration = 8, renderer = gifski_renderer())
anim_save("frenchtrains.gif")
Garden data
- In this exercise, you will create a stacked area plot that reveals itself over time (see the
geom_area() examples here). You will look at cumulative harvest of tomato varieties over time. You should do the following:
- From the
garden_harvest data, filter the data to the tomatoes and find the daily harvest in pounds for each variety.
- Then, for each variety, find the cumulative harvest in pounds.
- Use the data you just made to create a static cumulative harvest area plot, with the areas filled with different colors for each vegetable and arranged (HINT:
fct_reorder()) from most to least harvested (most on the bottom).
- Add animation to reveal the plot over date.
cum_tomato_harvest <- garden_harvest %>%
filter(vegetable == "tomatoes") %>%
complete(variety,date,fill = list(weight = 0)) %>%
group_by(variety, date) %>%
summarize(daily_weight_lbs = sum(weight*0.00220462)) %>%
mutate(cum_weight_lbs = cumsum(daily_weight_lbs)) %>%
ungroup() %>%
mutate(variety = fct_reorder(variety,cum_weight_lbs))
cum_tomato_harvest %>%
ggplot(aes(date, cum_weight_lbs, fill = variety)) +
geom_area() +
labs(title = "Cumulative Harvest of Tomatoes",
x = "Date",
Y = "Cumulative Weight (lbs)",
fill = "Variety") +
transition_reveal(date) -> tomato_graph
animate(tomato_graph, nframes = 100, duration = 8, renderer = gifski_renderer())
anim_save("tomatoes.gif")
Maps, animation, and movement!
- Map my
mallorca_bike_day7 bike ride using animation! Requirements:
- Plot on a map using
ggmap.
- Show “current” location with a red point.
- Show path up until the current point.
- Color the path according to elevation.
- Show the time in the subtitle.
- CHALLENGE: use the
ggimage package and geom_image to add a bike image instead of a red point. You can use this image. See here for an example.
- Add something of your own! And comment on if you prefer this to the static map and why or why not.
mallorca_map <- get_stamenmap(
bbox = c(right = 2.8, left = 2.3, top = 39.8, bottom = 39.5),
maptype = "terrain",
zoom = 12)
ggmap(mallorca_map) +
geom_path(data = mallorca_bike_day7,
aes(x = lon, y = lat, color = ele),
size = 3) +
labs(title = "Mallorca Bike Ride Elevation Over Time") +
transition_reveal(time) +
scale_color_viridis_c(option = "inferno") +
theme_map() +
theme(legend.background = element_blank()) -> bike_ride_gif
animate(bike_ride_gif, nframes = 100, duration = 8, renderer = gifski_renderer())
anim_save("bikeride.gif")
- In this exercise, you get to meet my sister, Heather! She is a proud Mac grad, currently works as a Data Scientist at 3M where she uses R everyday, and for a few years (while still holding a full-time job) she was a pro triathlete. You are going to map one of her races. The data from each discipline of the Ironman 70.3 Pan Am championships, Panama is in a separate file -
panama_swim, panama_bike, and panama_run. Create a similar map to the one you created with my cycling data. You will need to make some small changes: 1. combine the files (HINT: bind_rows(), 2. make the leading dot a different color depending on the event (for an extra challenge, make it a different image using `geom_image()!), 3. CHALLENGE (optional): color by speed, which you will need to compute on your own from the data. You can read Heather’s race report here. She is also in the Macalester Athletics Hall of Fame and still has records at the pool.
panama_ironman <- bind_rows(panama_swim, panama_bike, panama_run)
panama_map <- get_stamenmap(
bbox = c(left = -79.59, bottom = 8.91, right = -79.49, top = 9),
maptype = "terrain",
zoom = 13
)
ggmap(panama_map) +
geom_path(data = panama_ironman,
aes(x = lon, y = lat, color = event),
size = 2) +
transition_reveal(time) +
labs(title = "Route of Pan Am Ironman Championship",
color = "Event") +
theme_map() -> ironman_gif
animate(ironman_gif, nframes = 100, duration = 8, renderer = gifski_renderer())
anim_save("ironman.gif")
COVID-19 data
- In this exercise, you are going to replicate many of the features in this visualization by Aitish Bhatia but include all US states. Requirements:
- Create a new variable that computes the number of new cases in the past week (HINT: use the
lag() function you’ve used in a previous set of exercises). Replace missing values with 0’s using replace_na().
- Filter the data to omit rows where the cumulative case counts are less than 20.
- Create a static plot with cumulative cases on the x-axis and new cases in the past 7 days on the x-axis. Connect the points for each state over time. HINTS: use
geom_path() and add a group aesthetic. Put the x and y axis on the log scale and make the tick labels look nice - scales::comma is one option. This plot will look pretty ugly as is.
- Animate the plot to reveal the pattern by date. Display the date as the subtitle. Add a leading point to each state’s line (
geom_point()) and add the state name as a label (geom_text() - you should look at the check_overlap argument).
- Use the
animate() function to have 200 frames in your animation and make it 30 seconds long.
- Comment on what you observe.
In this graph we can observe that although New York used to have by far the most new weekly cases, Florida and California now have more, with Florida far in the lead.
covid19 %>%
group_by(state) %>%
mutate(weekly = lag(cases, n = 7, default = 0)) %>%
filter(weekly >= 20) %>%
ggplot(aes(x = cases, y = weekly, color = state)) +
scale_x_log10(label = scales::comma) +
scale_y_log10(label = scales::comma) +
geom_path() +
geom_point(size = 2) +
geom_text(aes(label = state),
check_overlap = TRUE) +
theme(legend.position = "none") +
transition_reveal(date) +
labs(title = "COVID19 Cases by State", subtitle = "Date: {frame_along}") -> covid_path
animate(covid_path, nframes = 200, duration = 30, renderer = gifski_renderer())
anim_save("covid_path.gif")
- In this exercise you will animate a map of the US, showing how cumulative COVID-19 cases per 10,000 residents has changed over time. This is similar to exercises 11 & 12 from the previous exercises, with the added animation! So, in the end, you should have something like the static map you made there, but animated over all the days. Put date in the subtitle. Comment on what you see.
In this map you can see that although Washington was the first state with a confirmed case, first New York and then Louisiana became the centers of the pandemic, with the center of the country rapidly increasing thereafter.
census_pop_est_2018 <- read_csv("https://www.dropbox.com/s/6txwv3b4ng7pepe/us_census_2018_state_pop_est.csv?dl=1") %>%
separate(state, into = c("dot","state"), extra = "merge") %>%
select(-dot) %>%
mutate(state = str_to_lower(state))
us_map <- map_data("state")
covid19 %>%
group_by(state) %>%
complete(state, date = seq.Date(min(date), max(date), by = "day")) %>%
# mutate(cases = replace_na(cases, 0)) %>%
filter(wday(date, label = TRUE) == "Fri") %>%
mutate(state = str_to_lower(state)) %>%
right_join(census_pop_est_2018,
by = "state") %>%
mutate(cases_per_10000 = cases/est_pop_2018*10000) %>%
ggplot() +
geom_map(aes(map_id = state,
fill = cases_per_10000,
group = date),
map = us_map) +
theme_map() +
labs(title = "Covid Cases per 10,000",
subtitle = "Date: {closest_state}",
fill = "Cases") +
expand_limits(x = us_map$long, y = us_map$lat) +
scale_fill_viridis_c(option = "viridis") +
theme(legend.background = element_blank()) +
transition_states(date) -> covid_map
animate(covid_map, nframes = 100, duration = 30, renderer = gifski_renderer())
anim_save("covid_map.gif")
Your first shiny app
- This app will also use the COVID data. Make sure you load that data and all the libraries you need in the
app.R file you create. Below, you will post a link to the app that you publish on shinyapps.io. You will create an app to compare states’ cumulative number of COVID cases over time. The x-axis will be number of days since 20+ cases and the y-axis will be cumulative cases on the log scale (scale_y_log10()). We use number of days since 20+ cases on the x-axis so we can make better comparisons of the curve trajectories. You will have an input box where the user can choose which states to compare (selectInput()) and have a submit button to click once the user has chosen all states they’re interested in comparing. The graph should display a different line for each state, with labels either on the graph or in a legend. Color can be used if needed.
GitHub link
- Below, provide a link to your GitHub page with this set of Weekly Exercises. Specifically, if the name of the file is 05_exercises.Rmd, provide a link to the 05_exercises.md file, which is the one that will be most readable on GitHub. If that file isn’t very readable, then provide a link to your main GitHub page.
https://github.com/ckollmer01/weekly_exercises_5
DID YOU REMEMBER TO UNCOMMENT THE OPTIONS AT THE TOP?
---
title: 'Weekly Exercises #5'
author: "Caedmon Kollmer-Dorsey"
output: 
  html_document:
    keep_md: TRUE
    toc: TRUE
    toc_float: TRUE
    df_print: paged
    code_download: true
---


```{r setup, include=FALSE}
#knitr::opts_chunk$set(echo = TRUE, error=TRUE, message=FALSE, warning=FALSE)
```

```{r libraries}
library(tidyverse)     # for data cleaning and plotting
library(googlesheets4) # for reading googlesheet data
library(lubridate)     # for date manipulation
library(openintro)     # for the abbr2state() function
library(palmerpenguins)# for Palmer penguin data
library(maps)          # for map data
library(ggmap)         # for mapping points on maps
library(gplots)        # for col2hex() function
library(RColorBrewer)  # for color palettes
library(sf)            # for working with spatial data
library(leaflet)       # for highly customizable mapping
library(ggthemes)      # for more themes (including theme_map())
library(plotly)        # for the ggplotly() - basic interactivity
library(gganimate)     # for adding animation layers to ggplots
library(transformr)    # for "tweening" (gganimate)
library(shiny)         # for creating interactive apps
library(gifski)
gs4_deauth()           # To not have to authorize each time you knit.
theme_set(theme_minimal())
```

```{r data}
# SNCF Train data
small_trains <- read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-02-26/small_trains.csv") 

# Lisa's garden data
garden_harvest <- read_sheet("https://docs.google.com/spreadsheets/d/1DekSazCzKqPS2jnGhKue7tLxRU3GVL1oxi-4bEM5IWw/edit?usp=sharing") %>% 
  mutate(date = ymd(date))

# Lisa's Mallorca cycling data
mallorca_bike_day7 <- read_csv("https://www.dropbox.com/s/zc6jan4ltmjtvy0/mallorca_bike_day7.csv?dl=1") %>% 
  select(1:4, speed)

# Heather Lendway's Ironman 70.3 Pan Am championships Panama data
panama_swim <- read_csv("https://raw.githubusercontent.com/llendway/gps-data/master/data/panama_swim_20160131.csv")

panama_bike <- read_csv("https://raw.githubusercontent.com/llendway/gps-data/master/data/panama_bike_20160131.csv")

panama_run <- read_csv("https://raw.githubusercontent.com/llendway/gps-data/master/data/panama_run_20160131.csv")

#COVID-19 data from the New York Times
covid19 <- read_csv("https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-states.csv")

data_site <- 
  "https://www.macalester.edu/~dshuman1/data/112/2014-Q4-Trips-History-Data.rds" 
Trips <- readRDS(gzcon(url(data_site)))

```

## Put your homework on GitHub!

Go [here](https://github.com/llendway/github_for_collaboration/blob/master/github_for_collaboration.md) or to previous homework to remind yourself how to get set up. 

Once your repository is created, you should always open your **project** rather than just opening an .Rmd file. You can do that by either clicking on the .Rproj file in your repository folder on your computer. Or, by going to the upper right hand corner in R Studio and clicking the arrow next to where it says Project: (None). You should see your project come up in that list if you've used it recently. You could also go to File --> Open Project and navigate to your .Rproj file. 

## Instructions

* Put your name at the top of the document. 

* **For ALL graphs, you should include appropriate labels.** 

* Feel free to change the default theme, which I currently have set to `theme_minimal()`. 

* Use good coding practice. Read the short sections on good code with [pipes](https://style.tidyverse.org/pipes.html) and [ggplot2](https://style.tidyverse.org/ggplot2.html). **This is part of your grade!**

* **NEW!!** With animated graphs, add `eval=FALSE` to the code chunk that creates the animation and saves it using `anim_save()`. Add another code chunk to reread the gif back into the file. See the [tutorial](https://animation-and-interactivity-in-r.netlify.app/) for help. 

* When you are finished with ALL the exercises, uncomment the options at the top so your document looks nicer. Don't do it before then, or else you might miss some important warnings and messages.

## Warm-up exercises from tutorial

  1. Choose 2 graphs you have created for ANY assignment in this class and add interactivity using the `ggplotly()` function. 
  
```{r}
covid_graph_1 <- covid19 %>%
  filter(state %in% c("Minnesota","Wisconsin","Iowa","North Dakota","South Dakota")) %>%
  group_by(state) %>%
  mutate(total_cases = cumsum(cases)) %>%
  ggplot(mapping = aes(x=date,y=total_cases,color=state)) +
  geom_line() +
  labs(title = "Covid19 Cases Over Time", x= "Date", y="Total Cases")

ggplotly(covid_graph_1)
```
  
```{r}
bike_graph_1 <- Trips %>%
  mutate (time_of_day = hour(sdate) + minute(sdate)/60,
          week_day = wday(sdate), label = TRUE) %>%
  ggplot(aes(x = time_of_day)) +
  facet_wrap(vars(week_day)) +
  geom_density(aes(fill = client, alpha = .5), color = NA) +
  theme(axis.ticks.y = element_blank(),
        axis.text.y = element_blank(),
        axis.text.x = element_blank()) +
  labs(title = "Trips by Casual and Registered Users over 1 Day",
       x = "Time of Day",
       y = "Proportion of Daily Trips",
       fill = "Client Type",
       alpha = NULL)

ggplotly(bike_graph_1)
```
  
  
  2. Use animation to tell an interesting story with the `small_trains` dataset that contains data from the SNCF (National Society of French Railways). These are Tidy Tuesday data! Read more about it [here](https://github.com/rfordatascience/tidytuesday/tree/master/data/2019/2019-02-26).

I've used animation to look at the proportion of trains from different stations arriving late to Paris Lyon station and organizing them by mean journey time, to see whether longer journeys are more likely to arrive late. The graph is somewhat inconclusive and could require more knowledge about the local rail system to fully answer.

```{r, eval = FALSE}
small_trains %>%
  filter(arrival_station == "PARIS LYON") %>%
  group_by(departure_station) %>%
  mutate(journey_time_avg = mean(journey_time_avg),
         prop_arriving_late = sum(num_arriving_late)/sum(total_num_trips)) %>%
  arrange(desc(journey_time_avg)) -> paris_station_late_arr

paris_station_late_arr %>%  
ggplot(aes(x = journey_time_avg, 
             y = prop_arriving_late)) +
  geom_smooth() + 
  labs(title = "Late Arrivals to Paris-Lyon by Journey Length from Departure Station",
       x = "Mean Journey Length from Departure Station (mins)",
       y = "Proportion of Late Arrivals")+
  transition_reveal(journey_time_avg)-> train_graph 

animate(train_graph, nframes = 100, duration = 8, renderer = gifski_renderer())

anim_save("frenchtrains.gif")
```


## Garden data

  3. In this exercise, you will create a stacked area plot that reveals itself over time (see the `geom_area()` examples [here](https://ggplot2.tidyverse.org/reference/position_stack.html)). You will look at cumulative harvest of tomato varieties over time. You should do the following:
  * From the `garden_harvest` data, filter the data to the tomatoes and find the *daily* harvest in pounds for each variety.  
  * Then, for each variety, find the cumulative harvest in pounds.  
  * Use the data you just made to create a static cumulative harvest area plot, with the areas filled with different colors for each vegetable and arranged (HINT: `fct_reorder()`) from most to least harvested (most on the bottom).  
  * Add animation to reveal the plot over date. 

```{r,eval=FALSE}
cum_tomato_harvest <- garden_harvest %>%
  filter(vegetable == "tomatoes") %>%
  complete(variety,date,fill = list(weight = 0)) %>%
  group_by(variety, date) %>%
  summarize(daily_weight_lbs = sum(weight*0.00220462)) %>%
  mutate(cum_weight_lbs = cumsum(daily_weight_lbs)) %>%
  ungroup() %>%
  mutate(variety = fct_reorder(variety,cum_weight_lbs))

cum_tomato_harvest %>%
  ggplot(aes(date, cum_weight_lbs, fill = variety)) +
  geom_area() +
  labs(title = "Cumulative Harvest of Tomatoes",
       x = "Date",
       Y = "Cumulative Weight (lbs)",
       fill = "Variety") +
  transition_reveal(date) -> tomato_graph

animate(tomato_graph, nframes = 100, duration = 8, renderer = gifski_renderer())

anim_save("tomatoes.gif")
```


## Maps, animation, and movement!

  4. Map my `mallorca_bike_day7` bike ride using animation! 
  Requirements:
  * Plot on a map using `ggmap`.  
  * Show "current" location with a red point. 
  * Show path up until the current point.  
  * Color the path according to elevation.  
  * Show the time in the subtitle.  
  * CHALLENGE: use the `ggimage` package and `geom_image` to add a bike image instead of a red point. You can use [this](https://raw.githubusercontent.com/llendway/animation_and_interactivity/master/bike.png) image. See [here](https://goodekat.github.io/presentations/2019-isugg-gganimate-spooky/slides.html#35) for an example. 
  * Add something of your own! And comment on if you prefer this to the static map and why or why not.
  
  
```{r,eval=FALSE}
mallorca_map <- get_stamenmap(
                bbox = c(right = 2.8, left = 2.3, top = 39.8, bottom = 39.5),
                maptype = "terrain",
                zoom = 12)

ggmap(mallorca_map) +
  geom_path(data = mallorca_bike_day7,
             aes(x = lon, y = lat, color = ele),
             size = 3) +
  labs(title = "Mallorca Bike Ride Elevation Over Time") +
  transition_reveal(time) +
  scale_color_viridis_c(option = "inferno") +
  theme_map() +
  theme(legend.background = element_blank()) -> bike_ride_gif

animate(bike_ride_gif, nframes = 100, duration = 8, renderer = gifski_renderer())

anim_save("bikeride.gif")
```
  
  
  5. In this exercise, you get to meet my sister, Heather! She is a proud Mac grad, currently works as a Data Scientist at 3M where she uses R everyday, and for a few years (while still holding a full-time job) she was a pro triathlete. You are going to map one of her races. The data from each discipline of the Ironman 70.3 Pan Am championships, Panama is in a separate file - `panama_swim`, `panama_bike`, and `panama_run`. Create a similar map to the one you created with my cycling data. You will need to make some small changes: 1. combine the files (HINT: `bind_rows()`, 2. make the leading dot a different color depending on the event (for an extra challenge, make it a different image using `geom_image()!), 3. CHALLENGE (optional): color by speed, which you will need to compute on your own from the data. You can read Heather's race report [here](https://heatherlendway.com/2016/02/10/ironman-70-3-pan-american-championships-panama-race-report/). She is also in the Macalester Athletics [Hall of Fame](https://athletics.macalester.edu/honors/hall-of-fame/heather-lendway/184) and still has records at the pool. 
  
```{r, eval=FALSE}
panama_ironman <- bind_rows(panama_swim, panama_bike, panama_run)

panama_map <- get_stamenmap(
  bbox = c(left = -79.59, bottom = 8.91, right = -79.49, top = 9),
  maptype = "terrain",
  zoom = 13
)
  
ggmap(panama_map) +
  geom_path(data = panama_ironman,
            aes(x = lon, y = lat, color = event),
            size = 2) +
  transition_reveal(time) +
  labs(title = "Route of Pan Am Ironman Championship",
       color = "Event") +
  theme_map()  -> ironman_gif

animate(ironman_gif, nframes = 100, duration = 8, renderer = gifski_renderer()) 

anim_save("ironman.gif")
```
  
## COVID-19 data

  6. In this exercise, you are going to replicate many of the features in [this](https://aatishb.com/covidtrends/?region=US) visualization by Aitish Bhatia but include all US states. Requirements:
 * Create a new variable that computes the number of new cases in the past week (HINT: use the `lag()` function you've used in a previous set of exercises). Replace missing values with 0's using `replace_na()`.  
  * Filter the data to omit rows where the cumulative case counts are less than 20.  
  * Create a static plot with cumulative cases on the x-axis and new cases in the past 7 days on the x-axis. Connect the points for each state over time. HINTS: use `geom_path()` and add a `group` aesthetic.  Put the x and y axis on the log scale and make the tick labels look nice - `scales::comma` is one option. This plot will look pretty ugly as is.
  * Animate the plot to reveal the pattern by date. Display the date as the subtitle. Add a leading point to each state's line (`geom_point()`) and add the state name as a label (`geom_text()` - you should look at the `check_overlap` argument).  
  * Use the `animate()` function to have 200 frames in your animation and make it 30 seconds long. 
  * Comment on what you observe.
  
  In this graph we can observe that although New York used to have by far the most new weekly cases, Florida and California now have more, with Florida far in the lead.
```{r, eval=FALSE}
covid19 %>% 
  group_by(state) %>% 
  mutate(weekly = lag(cases, n = 7, default = 0)) %>% 
  filter(weekly >= 20) %>% 
  ggplot(aes(x = cases, y = weekly, color = state)) + 
  scale_x_log10(label = scales::comma) + 
  scale_y_log10(label = scales::comma) + 
  geom_path() + 
  geom_point(size = 2) + 
  geom_text(aes(label = state), 
            check_overlap = TRUE) + 
  theme(legend.position = "none") + 
  transition_reveal(date) + 
  labs(title = "COVID19 Cases by State", subtitle = "Date: {frame_along}") -> covid_path 

animate(covid_path, nframes = 200, duration = 30, renderer = gifski_renderer()) 

anim_save("covid_path.gif")
```
  
  
  7. In this exercise you will animate a map of the US, showing how cumulative COVID-19 cases per 10,000 residents has changed over time. This is similar to exercises 11 & 12 from the previous exercises, with the added animation! So, in the end, you should have something like the static map you made there, but animated over all the days. Put date in the subtitle. Comment on what you see.
  
  In this map you can see that although Washington was the first state with a confirmed case, first New York and then Louisiana became the centers of the pandemic, with the center of the country rapidly increasing thereafter.
```{r, eval=FALSE}
census_pop_est_2018 <- read_csv("https://www.dropbox.com/s/6txwv3b4ng7pepe/us_census_2018_state_pop_est.csv?dl=1") %>% 
  separate(state, into = c("dot","state"), extra = "merge") %>% 
  select(-dot) %>% 
  mutate(state = str_to_lower(state))

us_map <- map_data("state")

covid19 %>%
  group_by(state) %>%
  complete(state, date = seq.Date(min(date), max(date), by = "day")) %>% 
  # mutate(cases = replace_na(cases, 0)) %>%
  filter(wday(date, label = TRUE) == "Fri") %>%
  mutate(state = str_to_lower(state)) %>%
  right_join(census_pop_est_2018,
            by = "state") %>%
  mutate(cases_per_10000 = cases/est_pop_2018*10000) %>%
  ggplot() +
  geom_map(aes(map_id = state,
               fill = cases_per_10000,
               group = date),
           map = us_map) +
  theme_map() +
  labs(title = "Covid Cases per 10,000",
       subtitle = "Date: {closest_state}",
       fill = "Cases") +
  expand_limits(x = us_map$long, y = us_map$lat) +
  scale_fill_viridis_c(option = "viridis") +
  theme(legend.background = element_blank()) +
  transition_states(date) -> covid_map
  
animate(covid_map, nframes = 100, duration = 30, renderer = gifski_renderer()) 

anim_save("covid_map.gif")
```
  

## Your first `shiny` app

  8. This app will also use the COVID data. Make sure you load that data and all the libraries you need in the `app.R` file you create. Below, you will post a link to the app that you publish on shinyapps.io. You will create an app to compare states' cumulative number of COVID cases over time. The x-axis will be number of days since 20+ cases and the y-axis will be cumulative cases on the log scale (`scale_y_log10()`). We use number of days since 20+ cases on the x-axis so we can make better comparisons of the curve trajectories. You will have an input box where the user can choose which states to compare (`selectInput()`) and have a submit button to click once the user has chosen all states they're interested in comparing. The graph should display a different line for each state, with labels either on the graph or in a legend. Color can be used if needed. 
  
## GitHub link

  9. Below, provide a link to your GitHub page with this set of Weekly Exercises. Specifically, if the name of the file is 05_exercises.Rmd, provide a link to the 05_exercises.md file, which is the one that will be most readable on GitHub. If that file isn't very readable, then provide a link to your main GitHub page.

https://github.com/ckollmer01/weekly_exercises_5

**DID YOU REMEMBER TO UNCOMMENT THE OPTIONS AT THE TOP?**
